載入一些跟繪圖有關的套件

# setup
knitr::opts_chunk$set(message=FALSE, fig.height=3, fig.width=5)
library(ggplot2) # to make pretty plots
library(dplyr) # for inner_join and left_join
library(car) # to recode
library(maps) # for the maps
library(gridExtra) # to arrange the plots in grids
library(plotly) # for interactive plots
library(cowplot) # to have get_legent

匯入csv資料以及看一下長怎樣,總共有三個檔案,分別代表2015、2016、2017三個年度

happy_2015 <- read.csv("2015.csv")
happy_2016 <- read.csv("2016.csv")
happy_2017 <- read.csv("2017.csv")
str(happy_2015)
'data.frame':   158 obs. of  12 variables:
 $ Country                      : Factor w/ 158 levels "Afghanistan",..: 136 59 38 106 25 46 100 135 101 7 ...
 $ Region                       : Factor w/ 10 levels "Australia and New Zealand",..: 10 10 10 10 6 10 10 10 1 1 ...
 $ Happiness.Rank               : int  1 2 3 4 5 6 7 8 9 10 ...
 $ Happiness.Score              : num  7.59 7.56 7.53 7.52 7.43 ...
 $ Standard.Error               : num  0.0341 0.0488 0.0333 0.0388 0.0355 ...
 $ Economy..GDP.per.Capita.     : num  1.4 1.3 1.33 1.46 1.33 ...
 $ Family                       : num  1.35 1.4 1.36 1.33 1.32 ...
 $ Health..Life.Expectancy.     : num  0.941 0.948 0.875 0.885 0.906 ...
 $ Freedom                      : num  0.666 0.629 0.649 0.67 0.633 ...
 $ Trust..Government.Corruption.: num  0.42 0.141 0.484 0.365 0.33 ...
 $ Generosity                   : num  0.297 0.436 0.341 0.347 0.458 ...
 $ Dystopia.Residual            : num  2.52 2.7 2.49 2.47 2.45 ...

因到時會把三個dataframe整合成一個dataframe,所以重新命名三個年度的column名稱以利辨別

一樣的欄位名稱要標上年份表示不同年度

colnames(happy_2015) <- c("Country","Region","Rank_2015","Score_2015","Err_2015","GDP_2015","Family_2015","Health_2015","Freedom_2015","Gov_2015","Generosity_2015","Dist_res_2015")
colnames(happy_2016) <- c("Country","Region_2016","Rank_2016","Score_2016","LCI_2016","UCI_2016","GDP_2016","Family_2016","Health_2016","Freedom_2016","Gov_2016","Generosity_2016","Dist_res_2016")
colnames(happy_2017) <- c("Country","Rank_2017","Score_2017","Whisker.high_2016","Whisker.low_2017","GDP_2017","Family_2017","Health_2017","Freedom_2017","Generosity_2017","Gov_2017","Dist_res_2017")

利用merge函數,接合三個dataframe成新的一個

查看前6筆資料

happy <- merge(happy_2015, happy_2016, by.x = "Country", by.y = "Country")
happy <- merge(happy, happy_2017, by.x = "Country", by.y = "Country")
happy$Region_2016 <- NULL
head(happy)  
      Country                          Region Rank_2015 Score_2015
1 Afghanistan                   Southern Asia       153      3.575
2     Albania      Central and Eastern Europe        95      4.959
3     Algeria Middle East and Northern Africa        68      5.605
4      Angola              Sub-Saharan Africa       137      4.033
5   Argentina     Latin America and Caribbean        30      6.574
6     Armenia      Central and Eastern Europe       127      4.350
  Err_2015 GDP_2015 Family_2015 Health_2015 Freedom_2015 Gov_2015
1  0.03084  0.31982     0.30285     0.30335      0.23414  0.09719
2  0.05013  0.87867     0.80434     0.81325      0.35733  0.06413
3  0.05099  0.93929     1.07772     0.61766      0.28579  0.17383
4  0.04758  0.75778     0.86040     0.16683      0.10384  0.07122
5  0.04612  1.05351     1.24823     0.78723      0.44974  0.08484
6  0.04763  0.76821     0.77711     0.72990      0.19847  0.03900
  Generosity_2015 Dist_res_2015 Rank_2016 Score_2016 LCI_2016 UCI_2016
1         0.36510       1.95210       154      3.360    3.288    3.432
2         0.14272       1.89894       109      4.655    4.546    4.764
3         0.07822       2.43209        38      6.355    6.227    6.483
4         0.12344       1.94939       141      3.866    3.753    3.979
5         0.11451       2.83600        26      6.650    6.560    6.740
6         0.07855       1.75873       121      4.360    4.266    4.454
  GDP_2016 Family_2016 Health_2016 Freedom_2016 Gov_2016 Generosity_2016
1  0.38227     0.11037     0.17344      0.16430  0.07112         0.31268
2  0.95530     0.50163     0.73007      0.31866  0.05301         0.16840
3  1.05266     0.83309     0.61804      0.21006  0.16157         0.07044
4  0.84731     0.66366     0.04991      0.00589  0.08434         0.12071
5  1.15137     1.06612     0.69711      0.42284  0.07296         0.10989
6  0.86086     0.62477     0.64083      0.14037  0.03616         0.07793
  Dist_res_2016 Rank_2017 Score_2017 Whisker.high_2016 Whisker.low_2017
1       2.14558       141      3.794          3.873661         3.714338
2       1.92816       109      4.644          4.752464         4.535536
3       3.40904        53      5.872          5.978286         5.765714
4       2.09459       140      3.795          3.951642         3.638358
5       3.12985        24      6.599          6.690085         6.507915
6       1.97864       121      4.376          4.466735         4.285265
   GDP_2017 Family_2017 Health_2017 Freedom_2017 Generosity_2017
1 0.4014772   0.5815433  0.18074678    0.1061795      0.31187093
2 0.9961928   0.8036852  0.73115975    0.3814986      0.20131294
3 1.0918645   1.1462175  0.61758465    0.2333358      0.06943665
4 0.8584282   1.1044120  0.04986867    0.0000000      0.09792649
5 1.1852955   1.4404511  0.69513708    0.4945192      0.10945706
6 0.9005967   1.0074837  0.63752443    0.1983033      0.08348809
    Gov_2017 Dist_res_2017
1 0.06115783      2.150801
2 0.03986422      1.490442
3 0.14609611      2.567604
4 0.06972034      1.614482
5 0.05973989      2.614005
6 0.02667442      1.521499

更改一些國家名字

happy$Country <- recode(happy$Country, 
                        "'Congo (Brazzaville)' = 'Democratic Republic of the Congo';
                        'Congo (Kinshasa)'='Republic of Congo';
                        'United States'='USA';
                        'United Kingdom'= 'UK'
                        ")

因第一欄(國家名)是屬於factor,所以要將它改成字串資料,才能做成地圖

class(happy[,1])
[1] "factor"
happy[,1]=as.character(happy[,1])

製作面量圖,每個國家的幸福指數比較

w <- map_data("world")
colnames(w)[5]<- "Country"
myw <- inner_join(w, happy, by = "Country")
worldplot <- ggplot(data = w, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1) + 
  geom_polygon(color = "black", fill = "gray") +
  geom_polygon(data=myw, aes(x = long, y = lat, group = group, fill = Score_2017),color = "white") +
  geom_polygon(color = "black", fill = NA) +
  theme_bw() +
  ggtitle("Happiness Score in 2017 in the World") +
  scale_fill_distiller(palette = "Spectral")
worldplot

Compute the variation in percentage between 2017 and 2015

happy$var_2016_2015 = 100*(happy$Score_2016 - happy$Score_2015)/happy$Score_2015
happy$var_2017_2016 = 100*(happy$Score_2017 - happy$Score_2016)/happy$Score_2016
happy$var_2017_2015 = 100*(happy$Score_2017 - happy$Score_2015)/happy$Score_2015

比較看看他們年份間的波動

pv <- ggplot(data=happy, aes(x=var_2017_2015, y=Region, text =paste("country:", Country))) +
  geom_point(aes(color=Score_2017), size=3, alpha=0.6) +
  xlab("Happiness variation between 2015 and 2017 [%]")+
  scale_colour_distiller(palette = "Spectral")+
  theme_bw() 
#scale_colour_gradientn(colours=rainbow(3)) # different colour scheme I tested, but in the end I line Spectral better
pv <- ggplotly(pv)
pv 

可以看出2017幸福指數越高的國家,波動越小;反之,波動較大

Are “Happy” People Really Happy?

在此我們要看他們的幸福指數跟他的行為跡象是否相關

下載Kaggle dataset 65 World Indexes並匯入

kaggle <- read.csv("Kaggle.csv")
colnames(kaggle)[1] <- "Country"
merged <- merge(happy, kaggle, by.x = "Country", by.y = "Country")

謀殺率與幸福指數

ggplot(data=merged, aes(y=Homicide.rate.per.100k.people.2008.2012, x=Score_2015))+
  theme_bw() +
  geom_point(aes(color=Region), size=3, alpha=0.6)   +
  geom_smooth() +
  ylab("Homicide Rate per 100k people 2008-2012")+
  xlab("Happiness Score in 2015")

可以看出其實有地區性的差異

自殺率與幸福指數(分男女)

srf <- ggplot(data=merged, aes(y=Female.Suicide.Rate.100k.people, x=Score_2015))+
  theme_bw() +
  geom_point(aes(color=Region), size=3, alpha=0.6) + geom_smooth()+
  ylab("Female Suicide Rate per 100k people")+
  xlab("Happiness Score in 2015")

srm <- ggplot(data=merged, aes(y=MaleSuicide.Rate.100k.people, x=Score_2015))+
  theme_bw() +
  geom_point(aes(color=Region), size=3, alpha=0.6) +
  geom_smooth() +
  ylab("Male Suicide Rate per 100k people")+
  xlab("Happiness Score in 2015")

grid.arrange(srf+theme(legend.position = "none"), srm+theme(legend.position = "none"), get_legend(srm), ncol = 3)

也是一樣,每個洲合在一起看看不出所以然,因他還是有地區性差異

「西歐」的自殺率與幸福指數

europe<-merged[ merged$Region=="Western Europe", ]

srfe <- ggplot(data=europe, aes(y=Female.Suicide.Rate.100k.people, x=Score_2015))+
  theme_bw() +
  geom_smooth()+
  scale_color_distiller(palette = "Spectral")+
  geom_text(aes(label=Country)) +
  xlab("Happiness Score in 2015") +
  geom_point(aes(color=GDP_2015), size=4, alpha=0.6) + 
  ylab("Female Suicide Rate per 100k people")

srme <-ggplot(data=europe, aes(y=MaleSuicide.Rate.100k.people, x=Score_2015))+
  theme_bw() +
  geom_smooth()+
  scale_color_distiller(palette = "Spectral")+
  geom_text(aes(label=Country)) +
  xlab("Happiness Score in 2015") +
  geom_point(aes(color=GDP_2015), size=4, alpha=0.6) + 
  ylab("Male Suicide Rate per 100k people")

grid.arrange(srfe+theme(legend.position="none"), srme+theme(legend.position="none"), get_legend(srme), ncol = 3,widths=c(3,3,1))

可以看出無論男女,幸福指數高的國家,每10萬人的自殺個案就越高

在此我們要檢視溫度與幸福指數是否相關

下載Climate Change: Earth Surface Temperature Data dataset並匯入

weather_all <- read.csv("GlobalLandTemperaturesByCountry.csv",fileEncoding='UTF-8')
weather_2013_01 <- weather_all[weather_all$dt=="2013-01-01",]
weather_2013_01[weather_2013_01$Country=="Denmark",]<-weather_2013_01[weather_2013_01$Country=="Denmark (Europe)",]
merged <- merge(merged, weather_2013_01, by.x = "Country", by.y = "Country")

溫度與幸福指數

europe<-merged[ merged$Region=="Western Europe", ]
happy_temp  <- ggplot(data=europe, aes(y=AverageTemperature, x=Score_2015))+
theme_bw() +
geom_point(aes(color=GDP_2015), size=4, alpha=0.6) +
geom_smooth() +
scale_color_distiller(palette = "Spectral")+
geom_text(aes(label=Country)) +
ylab("Average Temperature Jan 2013")+
xlab("Happiness Score in 2015")
happy_temp  

可以看出越冷國家,幸福指數越高,不過若合併自殺率來看…

溫度與自殺率

pm <- ggplot(data=europe, aes(y=MaleSuicide.Rate.100k.people, x=AverageTemperature))+
geom_point(aes(color=Score_2015), size=4, alpha=0.6) +
theme_bw() +
geom_smooth() +
geom_text(aes(label=Country)) +
scale_color_distiller(palette = "Spectral")+
ylab("Male Suicide Rate per 100k people")+
xlab("Average Temperature Jan 2013")

pf <- ggplot(data=europe, aes(y=Female.Suicide.Rate.100k.people, x=AverageTemperature))+
geom_point(aes(color=Score_2015), size=4, alpha=0.6) +
theme_bw() +
geom_smooth() + 
geom_text(aes(label=Country)) +
scale_color_distiller(palette = "Spectral")+
ylab("Female Suicide Rate per 100k people")+
xlab("Average Temperature Jan 2013")

grid.arrange(pf+  theme(legend.position = "none"), pm+  theme(legend.position = "none"), get_legend(pf), ncol = 3, widths=c(3,3,1))

最後可以看出越冷的國家,自殺率也會越高

結論:幸福指數是一個國家各個面向的平均狀態,若數字越高則代表整體狀況較佳,

但是不代表適用於國內的每一個個案,由上面的自殺率來看,則得到意想不到的結果

,幸福指數越高的國家如西北歐國家,自殺率反而越高,可能為高度發展國家面臨之

問題。